# This script pull census data and crime data for SitD

gc()
rm(list=ls())

# Parameters -------------------------------------------------------------------

  # Packages and libraries
  packages     = c("formattable", "here", "rgdal", "rjson", 
                   "tidycensus", "lubridate", "sf", "tidyverse", "RSocrata")
  lapply(packages, library, character.only = TRUE)
  
  # Census key and definitions
  census_api_key("29370f111959851fb3c988d3542f7985d36b1147")
  blocks       = "https://data.cityofchicago.org/resource/bt9m-d2mf.csv"
  districts    = "https://data.cityofchicago.org/resource/24zt-jpfn.csv"
  
  # Source for crime data
  data_chi     = "https://data.cityofchicago.org/api"
    crime_2018 = "views/3i3m-jwuy/rows.csv?accessType=DOWNLOAD"
    crime_2019 = "views/w98m-zvie/rows.csv?accessType=DOWNLOAD"
    crime_2020 = "views/qzdf-xmn8/rows.csv?accessType=DOWNLOAD"
  

# Cleaning geography data ------------------------------------------------------

    # Reading geography
    blocks_temp =  read.socrata(blocks) %>%
      dplyr::rename(geometry = the_geom) %>%
      st_as_sf(wkt = "geometry",crs=4326) %>% st_transform(crs = 3435) 

    block_groups_temp = blocks_temp %>%
      dplyr::mutate(GEOID = paste(str_sub(geoid10,0,12))) %>%
      dplyr::group_by(GEOID) %>%
      dplyr::summarise(geometry = st_union(geometry))

    districts_temp <- read.socrata(districts) %>%
      dplyr::rename(geometry = the_geom) %>%
      st_as_sf(wkt = "geometry",crs=4326) %>% st_transform(crs = 3435)

    # Convert geometry - making a crosswalk from block group to district
    block_groups_districts = st_join(block_groups_temp, districts_temp, 
                             join = "st_intercepts", left = F, largest = T)
    
    intersect_pct = st_intersection(block_groups_temp, districts_temp) %>% 
      mutate(intersect_area = st_area(.)) %>%   # create new column with shape area
      dplyr::select(GEOID, dist_num, intersect_area) %>%   # only select columns needed to merge
      st_drop_geometry()  # drop geometry as we don't need it
    
    bg_area = mutate(block_groups_temp, bg_area=st_area(block_groups_temp)) %>%
             select(GEOID, bg_area) %>% st_drop_geometry() 
    
    bg_area_coverage = merge(intersect_pct, bg_area, by="GEOID", all.y=TRUE) %>%
             mutate(coverage = as.numeric(100* intersect_area/bg_area))
    

# Cleaning crime data ----------------------------------------------------------

    # Download Crime data from Chicago data portal
    crime2018 =  read.csv(paste(data_chi, crime_2018, sep = "/"))
    crime2019 =  read.csv(paste(data_chi, crime_2019, sep = "/"))
    crime2020 =  read.csv(paste(data_chi, crime_2020, sep = "/"))
    
    # Filter to crimes within the pre-period of SitD [01/2018-01/2020]
    sitD_violent_def    = c("01A", "02", "03", "04A", "04B")
    
    crimes    = 
      rbind(crime2018, crime2019, crime2020) %>%
      mutate(date = as.Date(as.POSIXct(Date, format="%m/%d/%Y"))) %>%
      filter(between(date, as.Date('2018-01-01'), as.Date('2020-01-31'))) %>%
      mutate(all = 1, violent = as.numeric(FBI.Code %in% sitD_violent_def))
    
    # Collapse data into district
    crimes_collapse = crimes %>% group_by(District) %>%
      summarize(all_crimes = sum(all), violent_crimes = sum(violent))
    
# Useful function --------------------------------------------------------------

  # Draw ACS data
  get_acs_recent = function(yyy, vars) {
    return(get_acs(geography = "block group", state = "IL",
                   county = "Cook", variables = vars,
                   year = yyy, output = "wide"))
  }

  # Some calculation
  data_cleaning = function(df){
    df %>% mutate(
      District = District,
      p_emp_unemployed = B23025_005E / B23025_003E,
      p_poverty        = B17021_002E / B17021_001E,
      p_assistance     = B19057_002E / B19057_001E,
      p_female_head    = B11001_006E / B11001_001E,
      p_under18        = (B01001_003E + B01001_004E + B01001_005E + B01001_006E +
                            B01001_027E + B01001_028E + B01001_029E + B01001_030E)
      / B01001_001E,
      p_black          = B02001_003E / B02001_001E,
      p_hispanic       = B03003_003E / B03003_001E,
    )
  }

# Years 2011-2018 --------------------------------------------------------------

# List of variables to pull

acs_2011_2018 =  c(
  "B01001_001E", # population
  'B17021_001E', 'B17021_002E', # poverty
  'B15002_011E', 'B15002_028E', 'B15002_001E', # education
  'B11001_006E', 'B11001_001E', # head-of-household (woman head)
  'B19057_002E', 'B19057_001E', # Assistance
  'B01001_003E', 'B01001_004E','B01001_005E','B01001_006E', # Males under 18
  'B01001_027E', 'B01001_028E','B01001_029E','B01001_030E', # Females under 18
  'B02001_003E', 'B02001_001E', # Black
  'B03003_003E', 'B03003_001E', # Hispanic
  'B23025_005E', 'B23025_003E', # employment
  'B03002_004E', 'B03002_012E'
)

# ACS on recent years
acs_data = get_acs_recent(2019, acs_2011_2018)

chi_block_groups = 
  acs_data %>% 
  right_join(block_groups_districts, by = "GEOID") %>%
  select(-geometry, -NAME, -dist_label) %>%
  mutate(city = "Chicago")

chi_district = 
  aggregate(chi_block_groups[,2:(ncol(chi_block_groups)-1)], 
            by = list(District = chi_block_groups$dist_num), 
            function(chi_block_groups) sum(chi_block_groups, na.rm = TRUE)) 

# Indexing
results = chi_district %>% data_cleaning() %>%
  left_join(crimes_collapse, by ="District")

write.csv(results,"FILEPATH-CENSUS")


